Package Bio :: Package Restriction :: Module Restriction
[hide private]
[frames] | no frames]

Source Code for Module Bio.Restriction.Restriction

   1  #!/usr/bin/env python 
   2  # 
   3  #      Restriction Analysis Libraries. 
   4  #      Copyright (C) 2004. Frederic Sohm. 
   5  # 
   6  # This code is part of the Biopython distribution and governed by its 
   7  # license.  Please see the LICENSE file that should have been included 
   8  # as part of this package. 
   9  # 
  10   
  11  """Restriction Enzyme classes. 
  12   
  13  Notes about the diverses class of the restriction enzyme implementation:: 
  14   
  15              RestrictionType is the type of all restriction enzymes. 
  16          ---------------------------------------------------------------------------- 
  17              AbstractCut implements some methods that are common to all enzymes. 
  18          ---------------------------------------------------------------------------- 
  19              NoCut, OneCut,TwoCuts   represent the number of double strand cuts 
  20                                      produced by the enzyme. 
  21                                      they correspond to the 4th field of the 
  22                                      rebase record emboss_e.NNN. 
  23                      0->NoCut    : the enzyme is not characterised. 
  24                      2->OneCut   : the enzyme produce one double strand cut. 
  25                      4->TwoCuts  : two double strand cuts. 
  26          ---------------------------------------------------------------------------- 
  27              Meth_Dep, Meth_Undep    represent the methylation susceptibility to 
  28                                      the enzyme. 
  29                                      Not implemented yet. 
  30          ---------------------------------------------------------------------------- 
  31              Palindromic,            if the site is palindromic or not. 
  32              NotPalindromic          allow some optimisations of the code. 
  33                                      No need to check the reverse strand 
  34                                      with palindromic sites. 
  35          ---------------------------------------------------------------------------- 
  36              Unknown, Blunt,         represent the overhang. 
  37              Ov5, Ov3                Unknown is here for symmetry reasons and 
  38                                      correspond to enzymes that are not 
  39                                      characterised in rebase. 
  40          ---------------------------------------------------------------------------- 
  41              Defined, Ambiguous,     represent the sequence of the overhang. 
  42              NotDefined 
  43                                      NotDefined is for enzymes not characterised 
  44                                      in rebase. 
  45   
  46                                      Defined correspond to enzymes that display 
  47                                      a constant overhang whatever the sequence. 
  48                                      ex : EcoRI. G^AATTC -> overhang :AATT 
  49                                                  CTTAA^G 
  50   
  51                                      Ambiguous : the overhang varies with the 
  52                                      sequence restricted. 
  53                                      Typically enzymes which cut outside their 
  54                                      restriction site or (but not always) 
  55                                      inside an ambiguous site. 
  56                                      ex: 
  57                                      AcuI CTGAAG(22/20)  -> overhang : NN 
  58                                      AasI GACNNN^NNNGTC  -> overhang : NN 
  59                                           CTGN^NNNNNCAG 
  60   
  61                  note : these 3 classes refers to the overhang not the site. 
  62                     So the enzyme ApoI (RAATTY) is defined even if its 
  63                     restriction site is ambiguous. 
  64   
  65                          ApoI R^AATTY -> overhang : AATT -> Defined 
  66                               YTTAA^R 
  67                     Accordingly, blunt enzymes are always Defined even 
  68                     when they cut outside their restriction site. 
  69          ---------------------------------------------------------------------------- 
  70              Not_available,          as found in rebase file emboss_r.NNN files. 
  71              Commercially_available 
  72                                      allow the selection of the enzymes 
  73                                      according to their suppliers to reduce the 
  74                                      quantity of results. 
  75                                      Also will allow the implementation of 
  76                                      buffer compatibility tables. Not 
  77                                      implemented yet. 
  78   
  79                                      the list of suppliers is extracted from 
  80                                      emboss_s.NNN 
  81          ---------------------------------------------------------------------------- 
  82   
  83  """ 
  84   
  85  from __future__ import print_function 
  86   
  87  import warnings 
  88   
  89  from Bio._py3k import zip 
  90  from Bio._py3k import filter 
  91  from Bio._py3k import range 
  92   
  93  import re 
  94  import itertools 
  95   
  96  from Bio.Seq import Seq, MutableSeq 
  97  from Bio.Restriction.Restriction_Dictionary import rest_dict as enzymedict 
  98  from Bio.Restriction.Restriction_Dictionary import typedict 
  99  from Bio.Restriction.Restriction_Dictionary import suppliers as suppliers_dict 
 100  from Bio.Restriction.PrintFormat import PrintFormat 
 101  from Bio import BiopythonWarning 
102 103 104 # Used to use Bio.Restriction.DNAUtils.check_bases (and expose it under this 105 # namespace), but have deprecated that module. 106 107 108 -def _check_bases(seq_string):
109 """Check characters in a string (PRIVATE). 110 111 Remove digits and white space present in string. Allows any valid ambiguous 112 IUPAC DNA single letters codes (ABCDGHKMNRSTVWY, lower case are converted). 113 114 Other characters (e.g. symbols) trigger a TypeError. 115 116 Returns the string WITH A LEADING SPACE (!). This is for backwards 117 compatibility, and may in part be explained by the fact that 118 Bio.Restriction doesn't use zero based counting. 119 """ 120 # Remove white space and make upper case: 121 seq_string = "".join(seq_string.split()).upper() 122 # Remove digits 123 for c in "0123456789": 124 seq_string = seq_string.replace(c, "") 125 # Check only allowed IUPAC letters 126 if not set(seq_string).issubset(set("ABCDGHKMNRSTVWY")): 127 raise TypeError("Invalid character found in %s" % repr(seq_string)) 128 return " " + seq_string
129 130 131 matching = {'A': 'ARWMHVDN', 'C': 'CYSMHBVN', 'G': 'GRSKBVDN', 132 'T': 'TYWKHBDN', 'R': 'ABDGHKMNSRWV', 'Y': 'CBDHKMNSTWVY', 133 'W': 'ABDHKMNRTWVY', 'S': 'CBDGHKMNSRVY', 'M': 'ACBDHMNSRWVY', 134 'K': 'BDGHKNSRTWVY', 'H': 'ACBDHKMNSRTWVY', 135 'B': 'CBDGHKMNSRTWVY', 'V': 'ACBDGHKMNSRWVY', 136 'D': 'ABDGHKMNSRTWVY', 'N': 'ACBDGHKMNSRTWVY'} 137 138 DNA = Seq
139 140 141 -class FormattedSeq(object):
142 """FormattedSeq(seq, [linear=True])-> new FormattedSeq. 143 144 Translate a Bio.Seq into a formatted sequence to be used with Restriction. 145 146 Roughly: remove anything which is not IUPAC alphabet and then add a space 147 in front of the sequence to get a biological index instead of a 148 python index (i.e. index of the first base is 1 not 0). 149 150 Retains information about the shape of the molecule linear (default) or 151 circular. Restriction sites are search over the edges of circular sequence. 152 """ 153
154 - def __init__(self, seq, linear=True):
155 """FormattedSeq(seq, [linear=True])-> new FormattedSeq. 156 157 seq is either a Bio.Seq, Bio.MutableSeq or a FormattedSeq. 158 if seq is a FormattedSeq, linear will have no effect on the 159 shape of the sequence. 160 """ 161 if isinstance(seq, (Seq, MutableSeq)): 162 stringy = str(seq) 163 self.lower = stringy.islower() 164 # Note this adds a leading space to the sequence (!) 165 self.data = _check_bases(stringy) 166 self.linear = linear 167 self.klass = seq.__class__ 168 self.alphabet = seq.alphabet 169 elif isinstance(seq, FormattedSeq): 170 self.lower = seq.lower 171 self.data = seq.data 172 self.linear = seq.linear 173 self.alphabet = seq.alphabet 174 self.klass = seq.klass 175 else: 176 raise TypeError('expected Seq or MutableSeq, got %s' % type(seq))
177
178 - def __len__(self):
179 return len(self.data) - 1
180
181 - def __repr__(self):
182 return 'FormattedSeq(%s, linear=%s)' % (repr(self[1:]), 183 repr(self.linear))
184
185 - def __eq__(self, other):
186 if isinstance(other, FormattedSeq): 187 if repr(self) == repr(other): 188 return True 189 else: 190 return False 191 return False
192
193 - def circularise(self):
194 """Circularise sequence in place.""" 195 self.linear = False 196 return
197
198 - def linearise(self):
199 """Linearise sequence in place.""" 200 self.linear = True 201 return
202
203 - def to_linear(self):
204 """Make a new instance of sequence as linear.""" 205 new = self.__class__(self) 206 new.linear = True 207 return new
208
209 - def to_circular(self):
210 """Make a new instance of sequence as circular.""" 211 new = self.__class__(self) 212 new.linear = False 213 return new
214
215 - def is_linear(self):
216 """Return if sequence is linear (True) or circular (False).""" 217 return self.linear
218
219 - def finditer(self, pattern, size):
220 """Return a list of a given pattern which occurs in the sequence. 221 222 The list is made of tuple (location, pattern.group). 223 The latter is used with non palindromic sites. 224 Pattern is the regular expression pattern corresponding to the 225 enzyme restriction site. 226 Size is the size of the restriction enzyme recognition-site size. 227 """ 228 if self.is_linear(): 229 data = self.data 230 else: 231 data = self.data + self.data[1:size] 232 return [(i.start(), i.group) for i in re.finditer(pattern, data)]
233
234 - def __getitem__(self, i):
235 if self.lower: 236 return self.klass((self.data[i]).lower(), self.alphabet) 237 return self.klass(self.data[i], self.alphabet)
238
239 240 -class RestrictionType(type):
241 """RestrictionType. Type from which all enzyme classes are derived. 242 243 Implement the operator methods. 244 """ 245
246 - def __init__(cls, name='', bases=(), dct=None):
247 """Initialize RestrictionType instance. 248 249 Not intended to be used in normal operation. The enzymes are 250 instantiated when importing the module. 251 See below. 252 """ 253 if "-" in name: 254 raise ValueError("Problem with hyphen in %s as enzyme name" 255 % repr(name)) 256 # 2011/11/26 - Nobody knows what this call was supposed to accomplish, 257 # but all unit tests seem to pass without it. 258 # super(RestrictionType, cls).__init__(cls, name, bases, dct) 259 try: 260 cls.compsite = re.compile(cls.compsite) 261 except Exception: 262 raise ValueError("Problem with regular expression, re.compiled(%s)" 263 % repr(cls.compsite))
264
265 - def __add__(cls, other):
266 """Add restriction enzyme to a RestrictionBatch(). 267 268 If other is an enzyme returns a batch of the two enzymes. 269 If other is already a RestrictionBatch add enzyme to it. 270 """ 271 if isinstance(other, RestrictionType): 272 return RestrictionBatch([cls, other]) 273 elif isinstance(other, RestrictionBatch): 274 return other.add_nocheck(cls) 275 else: 276 raise TypeError
277
278 - def __div__(cls, other):
279 """Override '/' operator to use as search method. 280 281 >>> EcoRI/Seq('GAATTC') 282 [2] 283 Returns RE.search(other). 284 """ 285 return cls.search(other)
286
287 - def __rdiv__(cls, other):
288 """Override division with reversed operands to use as search method. 289 290 >>> Seq('GAATTC')/EcoRI 291 [2] 292 Returns RE.search(other). 293 """ 294 return cls.search(other)
295
296 - def __truediv__(cls, other):
297 """Override Python 3 division operator to use as search method. 298 299 Like __div__. 300 """ 301 return cls.search(other)
302
303 - def __rtruediv__(cls, other):
304 """As __truediv___, with reversed operands. 305 306 Like __rdiv__. 307 """ 308 return cls.search(other)
309
310 - def __floordiv__(cls, other):
311 """Override '//' operator to use as catalyse method. 312 313 >>> EcoRI//Seq('GAATTC') 314 (Seq('G', Alphabet()), Seq('AATTC', Alphabet())) 315 Returns RE.catalyse(other). 316 """ 317 return cls.catalyse(other)
318
319 - def __rfloordiv__(cls, other):
320 """As __floordiv__, with reversed operands. 321 322 >>> Seq('GAATTC')//EcoRI 323 (Seq('G', Alphabet()), Seq('AATTC', Alphabet())) 324 Returns RE.catalyse(other). 325 """ 326 return cls.catalyse(other)
327
328 - def __str__(cls):
329 """Return the name of the enzyme as string.""" 330 return cls.__name__
331
332 - def __repr__(cls):
333 """Implement repr method. 334 335 Used with eval or exec will instantiate the enzyme. 336 """ 337 return "%s" % cls.__name__
338
339 - def __len__(cls):
340 """Return lenght of recognition site of enzyme as int.""" 341 return cls.size
342
343 - def __hash__(cls):
344 # Python default is to use id(...) 345 # This is consistent with the __eq__ implementation 346 return id(cls)
347
348 - def __eq__(cls, other):
349 """Override '==' operator. 350 351 True if RE and other are the same enzyme. 352 353 Specifically this checks they are the same Python object. 354 """ 355 # assert (id(cls)==id(other)) == (other is cls) == (cls is other) 356 return id(cls) == id(other)
357
358 - def __ne__(cls, other):
359 """Override '!=' operator. 360 361 Isoschizomer strict (same recognition site, same restriction) -> False 362 All the other-> True 363 364 WARNING - This is not the inverse of the __eq__ method 365 >>> SacI != SstI # true isoschizomers 366 False 367 >>> SacI == SstI 368 False 369 """ 370 if not isinstance(other, RestrictionType): 371 return True 372 elif cls.charac == other.charac: 373 return False 374 else: 375 return True
376
377 - def __rshift__(cls, other):
378 """Override '>>' operator to test for neoschizomers. 379 380 neoschizomer : same recognition site, different restriction. -> True 381 all the others : -> False 382 >>> SmaI >> XmaI 383 True 384 """ 385 if not isinstance(other, RestrictionType): 386 return False 387 elif cls.site == other.site and cls.charac != other.charac: 388 return True 389 else: 390 return False
391
392 - def __mod__(cls, other):
393 """Override '%' operator to test for compatible overhangs. 394 395 True if a and b have compatible overhang. 396 >>> XhoI % SalI 397 True 398 """ 399 if not isinstance(other, RestrictionType): 400 raise TypeError( 401 'expected RestrictionType, got %s instead' % type(other)) 402 return cls._mod1(other)
403
404 - def __ge__(cls, other):
405 """Compare length of recognition site of two enzymes. 406 407 Override '>='. a is greater or equal than b if the a site is longer 408 than b site. If their site have the same length sort by alphabetical 409 order of their names. 410 >>> EcoRI.size 411 6 412 >>> EcoRV.size 413 6 414 >>> EcoRI >= EcoRV 415 False 416 """ 417 if not isinstance(other, RestrictionType): 418 raise NotImplementedError 419 if len(cls) > len(other): 420 return True 421 elif cls.size == len(other) and cls.__name__ >= other.__name__: 422 return True 423 else: 424 return False
425
426 - def __gt__(cls, other):
427 """Compare length of recognition site of two enzymes. 428 429 Override '>'. Sorting order: 430 1. size of the recognition site. 431 2. if equal size, alphabetical order of the names. 432 433 """ 434 if not isinstance(other, RestrictionType): 435 raise NotImplementedError 436 if len(cls) > len(other): 437 return True 438 elif cls.size == len(other) and cls.__name__ > other.__name__: 439 return True 440 else: 441 return False
442
443 - def __le__(cls, other):
444 """Compare length of recognition site of two enzymes. 445 446 Override '<='. Sorting order: 447 1. size of the recognition site. 448 2. if equal size, alphabetical order of the names. 449 450 """ 451 if not isinstance(other, RestrictionType): 452 raise NotImplementedError 453 elif len(cls) < len(other): 454 return True 455 elif len(cls) == len(other) and cls.__name__ <= other.__name__: 456 return True 457 else: 458 return False
459
460 - def __lt__(cls, other):
461 """Compare length of recognition site of two enzymes. 462 463 Override '<'. Sorting order: 464 1. size of the recognition site. 465 2. if equal size, alphabetical order of the names. 466 467 """ 468 if not isinstance(other, RestrictionType): 469 raise NotImplementedError 470 elif len(cls) < len(other): 471 return True 472 elif len(cls) == len(other) and cls.__name__ < other.__name__: 473 return True 474 else: 475 return False
476
477 478 -class AbstractCut(RestrictionType):
479 """Implement the methods that are common to all restriction enzymes. 480 481 All the methods are classmethod. 482 483 For internal use only. Not meant to be instantiated. 484 """ 485 486 @classmethod
487 - def search(cls, dna, linear=True):
488 """Return a list of cutting sites of the enzyme in the sequence. 489 490 Compensate for circular sequences and so on. 491 492 dna must be a Bio.Seq.Seq instance or a Bio.Seq.MutableSeq instance. 493 494 If linear is False, the restriction sites that span over the boundaries 495 will be included. 496 497 The positions are the first base of the 3' fragment, 498 i.e. the first base after the position the enzyme will cut. 499 """ 500 # 501 # Separating search from _search allow a (very limited) optimisation 502 # of the search when using a batch of restriction enzymes. 503 # in this case the DNA is tested once by the class which implements 504 # the batch instead of being tested by each enzyme single. 505 # see RestrictionBatch.search() for example. 506 # 507 if isinstance(dna, FormattedSeq): 508 cls.dna = dna 509 return cls._search() 510 else: 511 cls.dna = FormattedSeq(dna, linear) 512 return cls._search()
513 514 @classmethod
515 - def all_suppliers(cls):
516 """Print all the suppliers of restriction enzyme.""" 517 supply = sorted(x[0] for x in suppliers_dict.values()) 518 print(",\n".join(supply)) 519 return
520 521 @classmethod
522 - def is_equischizomer(cls, other):
523 """Test for real isoschizomer. 524 525 True if other is an isoschizomer of RE, but not an neoschizomer, 526 else False. 527 528 Equischizomer: same site, same position of restriction. 529 >>> SacI.is_equischizomer(SstI) 530 True 531 >>> SmaI.is_equischizomer(XmaI) 532 False 533 534 """ 535 return not cls != other
536 537 @classmethod
538 - def is_neoschizomer(cls, other):
539 """Test for neoschizomer. 540 541 True if other is an isoschizomer of RE, else False. 542 Neoschizomer: same site, different position of restriction. 543 """ 544 return cls >> other
545 546 @classmethod
547 - def is_isoschizomer(cls, other):
548 """Test for same recognition site. 549 550 True if other has the same recognition site, else False. 551 552 Isoschizomer: same site. 553 >>> SacI.is_isoschizomer(SstI) 554 True 555 >>> SmaI.is_isoschizomer(XmaI) 556 True 557 558 """ 559 return (not cls != other) or cls >> other
560 561 @classmethod
562 - def equischizomers(cls, batch=None):
563 """List equischizomers of the enzyme. 564 565 Return a tuple of all the isoschizomers of RE. 566 If batch is supplied it is used instead of the default AllEnzymes. 567 568 Equischizomer: same site, same position of restriction. 569 """ 570 if not batch: 571 batch = AllEnzymes 572 r = [x for x in batch if not cls != x] 573 i = r.index(cls) 574 del r[i] 575 r.sort() 576 return r
577 578 @classmethod
579 - def neoschizomers(cls, batch=None):
580 """List neoschizomers of the enzyme. 581 582 Return a tuple of all the neoschizomers of RE. 583 If batch is supplied it is used instead of the default AllEnzymes. 584 585 Neoschizomer: same site, different position of restriction. 586 """ 587 if not batch: 588 batch = AllEnzymes 589 r = sorted(x for x in batch if cls >> x) 590 return r
591 592 @classmethod
593 - def isoschizomers(cls, batch=None):
594 """List all isoschizomers of the enzyme. 595 596 Return a tuple of all the equischizomers and neoschizomers of RE. 597 If batch is supplied it is used instead of the default AllEnzymes. 598 """ 599 if not batch: 600 batch = AllEnzymes 601 r = [x for x in batch if (cls >> x) or (not cls != x)] 602 i = r.index(cls) 603 del r[i] 604 r.sort() 605 return r
606 607 @classmethod
608 - def frequency(cls):
609 """Return the theoretically cutting frequency of the enzyme. 610 611 Frequency of the site, given as 'one cut per x bases' (int). 612 """ 613 return cls.freq
614
615 616 -class NoCut(AbstractCut):
617 """Implement the methods specific to the enzymes that do not cut. 618 619 These enzymes are generally enzymes that have been only partially 620 characterised and the way they cut the DNA is unknow or enzymes for 621 which the pattern of cut is to complex to be recorded in Rebase 622 (ncuts values of 0 in emboss_e.###). 623 624 When using search() with these enzymes the values returned are at the start 625 of the restriction site. 626 627 Their catalyse() method returns a TypeError. 628 629 Unknown and NotDefined are also part of the base classes of these enzymes. 630 631 Internal use only. Not meant to be instantiated. 632 """ 633 634 @classmethod
635 - def cut_once(cls):
636 """Return if the cutting pattern has one cut. 637 638 True if the enzyme cut the sequence one time on each strand. 639 """ 640 return False
641 642 @classmethod
643 - def cut_twice(cls):
644 """Return if the cutting pattern has two cuts. 645 646 True if the enzyme cut the sequence twice on each strand. 647 """ 648 return False
649 650 @classmethod
651 - def _modify(cls, location):
652 """Return a generator that moves the cutting position by 1 (PRIVATE). 653 654 For internal use only. 655 656 location is an integer corresponding to the location of the match for 657 the enzyme pattern in the sequence. 658 _modify returns the real place where the enzyme will cut. 659 660 Example:: 661 662 EcoRI pattern : GAATTC 663 EcoRI will cut after the G. 664 so in the sequence: 665 ______ 666 GAATACACGGAATTCGA 667 | 668 10 669 dna.finditer(GAATTC, 6) will return 10 as G is the 10th base 670 EcoRI cut after the G so: 671 EcoRI._modify(10) -> 11. 672 673 If the enzyme cut twice _modify will returns two integer corresponding 674 to each cutting site. 675 """ 676 yield location
677 678 @classmethod
679 - def _rev_modify(cls, location):
680 """Return a generator that moves the cutting position by 1 (PRIVATE). 681 682 For internal use only. 683 684 As _modify for site situated on the antiparallel strand when the 685 enzyme is not palindromic. 686 """ 687 yield location
688 689 @classmethod
690 - def characteristic(cls):
691 """Return a list of the enzyme's characteristics as tuple. 692 693 the tuple contains the attributes: 694 - fst5 -> first 5' cut ((current strand) or None 695 - fst3 -> first 3' cut (complementary strand) or None 696 - scd5 -> second 5' cut (current strand) or None 697 - scd5 -> second 3' cut (complementary strand) or None 698 - site -> recognition site. 699 700 """ 701 return None, None, None, None, cls.site
702
703 704 -class OneCut(AbstractCut):
705 """Implement the methods for enzymes that cut the DNA only once. 706 707 Correspond to ncuts values of 2 in emboss_e.### 708 709 Internal use only. Not meant to be instantiated. 710 """ 711 712 @classmethod
713 - def cut_once(cls):
714 """Return if the cutting pattern has one cut. 715 716 True if the enzyme cut the sequence one time on each strand. 717 """ 718 return True
719 720 @classmethod
721 - def cut_twice(cls):
722 """Return if the cutting pattern has two cuts. 723 724 True if the enzyme cut the sequence twice on each strand. 725 """ 726 return False
727 728 @classmethod
729 - def _modify(cls, location):
730 """Return a generator that moves the cutting position by 1 (PRIVATE). 731 732 For internal use only. 733 734 location is an integer corresponding to the location of the match for 735 the enzyme pattern in the sequence. 736 _modify returns the real place where the enzyme will cut. 737 738 Example:: 739 740 EcoRI pattern : GAATTC 741 EcoRI will cut after the G. 742 so in the sequence: 743 ______ 744 GAATACACGGAATTCGA 745 | 746 10 747 dna.finditer(GAATTC, 6) will return 10 as G is the 10th base 748 EcoRI cut after the G so: 749 EcoRI._modify(10) -> 11. 750 751 if the enzyme cut twice _modify will returns two integer corresponding 752 to each cutting site. 753 """ 754 yield location + cls.fst5
755 756 @classmethod
757 - def _rev_modify(cls, location):
758 """Return a generator that moves the cutting position by 1 (PRIVATE). 759 760 For internal use only. 761 762 As _modify for site situated on the antiparallel strand when the 763 enzyme is not palindromic 764 """ 765 yield location - cls.fst3
766 767 @classmethod
768 - def characteristic(cls):
769 """Return a list of the enzyme's characteristics as tuple. 770 771 The tuple contains the attributes: 772 - fst5 -> first 5' cut ((current strand) or None 773 - fst3 -> first 3' cut (complementary strand) or None 774 - scd5 -> second 5' cut (current strand) or None 775 - scd5 -> second 3' cut (complementary strand) or None 776 - site -> recognition site. 777 778 """ 779 return cls.fst5, cls.fst3, None, None, cls.site
780
781 782 -class TwoCuts(AbstractCut):
783 """Implement the methods for enzymes that cut the DNA twice. 784 785 Correspond to ncuts values of 4 in emboss_e.### 786 787 Internal use only. Not meant to be instantiated. 788 """ 789 790 @classmethod
791 - def cut_once(cls):
792 """Return if the cutting pattern has one cut. 793 794 True if the enzyme cut the sequence one time on each strand. 795 """ 796 return False
797 798 @classmethod
799 - def cut_twice(cls):
800 """Return if the cutting pattern has two cuts. 801 802 True if the enzyme cut the sequence twice on each strand. 803 """ 804 return True
805 806 @classmethod
807 - def _modify(cls, location):
808 """Return a generator that moves the cutting position by 1 (PRIVATE). 809 810 For internal use only. 811 812 location is an integer corresponding to the location of the match for 813 the enzyme pattern in the sequence. 814 _modify returns the real place where the enzyme will cut. 815 816 example:: 817 818 EcoRI pattern : GAATTC 819 EcoRI will cut after the G. 820 so in the sequence: 821 ______ 822 GAATACACGGAATTCGA 823 | 824 10 825 dna.finditer(GAATTC, 6) will return 10 as G is the 10th base 826 EcoRI cut after the G so: 827 EcoRI._modify(10) -> 11. 828 829 if the enzyme cut twice _modify will returns two integer corresponding 830 to each cutting site. 831 """ 832 yield location + cls.fst5 833 yield location + cls.scd5
834 835 @classmethod
836 - def _rev_modify(cls, location):
837 """Return a generator that moves the cutting position by 1. 838 839 for internal use only. 840 841 as _modify for site situated on the antiparallel strand when the 842 enzyme is not palindromic 843 """ 844 yield location - cls.fst3 845 yield location - cls.scd3
846 847 @classmethod
848 - def characteristic(cls):
849 """Return a list of the enzyme's characteristics as tuple. 850 851 the tuple contains the attributes: 852 - fst5 -> first 5' cut ((current strand) or None 853 - fst3 -> first 3' cut (complementary strand) or None 854 - scd5 -> second 5' cut (current strand) or None 855 - scd5 -> second 3' cut (complementary strand) or None 856 - site -> recognition site. 857 858 """ 859 return cls.fst5, cls.fst3, cls.scd5, cls.scd3, cls.site
860
861 862 -class Meth_Dep(AbstractCut):
863 """Implement the information about methylation. 864 865 Enzymes of this class possess a site which is methylable. 866 """ 867 868 @classmethod
869 - def is_methylable(cls):
870 """Return if recognition site can be methylated. 871 872 True if the recognition site is a methylable. 873 """ 874 return True
875
876 877 -class Meth_Undep(AbstractCut):
878 """Implement information about methylation sensitibility. 879 880 Enzymes of this class are not sensible to methylation. 881 """ 882 883 @classmethod
884 - def is_methylable(cls):
885 """Return if recognition site can be methylated. 886 887 True if the recognition site is a methylable. 888 """ 889 return False
890
891 892 -class Palindromic(AbstractCut):
893 """Implement methods for enzymes with palindromic recognition sites. 894 895 palindromic means : the recognition site and its reverse complement are 896 identical. 897 Remarks : an enzyme with a site CGNNCG is palindromic even if some 898 of the sites that it will recognise are not. 899 for example here : CGAACG 900 901 Internal use only. Not meant to be instantiated. 902 """ 903 904 @classmethod
905 - def _search(cls):
906 """Return a list of cutting sites of the enzyme in the sequence (PRIVATE). 907 908 For internal use only. 909 910 Implement the search method for palindromic enzymes. 911 """ 912 siteloc = cls.dna.finditer(cls.compsite, cls.size) 913 cls.results = [r for s, g in siteloc for r in cls._modify(s)] 914 if cls.results: 915 cls._drop() 916 return cls.results
917 918 @classmethod
919 - def is_palindromic(cls):
920 """Return if the enzyme has a palindromic recoginition site.""" 921 return True
922
923 924 -class NonPalindromic(AbstractCut):
925 """Implement methods for enzymes with non-palindromic recognition sites. 926 927 Palindromic means : the recognition site and its reverse complement are 928 identical. 929 930 Internal use only. Not meant to be instantiated. 931 """ 932 933 @classmethod
934 - def _search(cls):
935 """Return a list of cutting sites of the enzyme in the sequence (PRIVATE). 936 937 For internal use only. 938 939 Implement the search method for non palindromic enzymes. 940 """ 941 compsite_for, compsite_rev = cls.compsite.pattern.split('|') 942 iterator_for = cls.dna.finditer(compsite_for, cls.size) 943 iterator_rev = cls.dna.finditer(compsite_rev, cls.size) 944 cls.results = [] 945 modif = cls._modify 946 revmodif = cls._rev_modify 947 s = str(cls) 948 cls.on_minus = [] 949 950 for start, group in iterator_for: 951 if group(s): 952 cls.results += [r for r in modif(start)] 953 s += '_as' 954 for start, group in iterator_rev: 955 if group(s): 956 cls.results += [r for r in revmodif(start)] 957 958 if cls.results: 959 cls.results.sort() 960 cls._drop() 961 return cls.results
962 963 @classmethod
964 - def is_palindromic(cls):
965 """Return if the enzyme has a palindromic recoginition site.""" 966 return False
967
968 969 -class Unknown(AbstractCut):
970 """Implement methods for enzymes that produce unknown overhangs. 971 972 These enzymes are also NotDefined and NoCut. 973 974 Internal use only. Not meant to be instantiated. 975 """ 976 977 @classmethod
978 - def catalyse(cls, dna, linear=True):
979 """List the sequence fragments after cutting dna with enzyme. 980 981 RE.catalyze(dna, linear=True) -> tuple of DNA. 982 983 Return a tuple of dna as will be produced by using RE to restrict the 984 dna. 985 986 dna must be a Bio.Seq.Seq instance or a Bio.Seq.MutableSeq instance. 987 988 If linear is False, the sequence is considered to be circular and the 989 output will be modified accordingly. 990 """ 991 raise NotImplementedError('%s restriction is unknown.' 992 % cls.__name__)
993 catalyze = catalyse 994 995 @classmethod
996 - def is_blunt(cls):
997 """Return if the enzyme produces blunt ends. 998 999 True if the enzyme produces blunt end. 1000 1001 Related methods: 1002 - RE.is_3overhang() 1003 - RE.is_5overhang() 1004 - RE.is_unknown() 1005 1006 """ 1007 return False
1008 1009 @classmethod
1010 - def is_5overhang(cls):
1011 """Return if the enzymes produces 5' overhanging ends. 1012 1013 True if the enzyme produces 5' overhang sticky end. 1014 1015 Related methods: 1016 - RE.is_3overhang() 1017 - RE.is_blunt() 1018 - RE.is_unknown() 1019 1020 """ 1021 return False
1022 1023 @classmethod
1024 - def is_3overhang(cls):
1025 """Return if the enzyme produces 3' overhanging ends. 1026 1027 True if the enzyme produces 3' overhang sticky end. 1028 1029 Related methods: 1030 - RE.is_5overhang() 1031 - RE.is_blunt() 1032 - RE.is_unknown() 1033 1034 """ 1035 return False
1036 1037 @classmethod
1038 - def overhang(cls):
1039 """Return the type of the enzyme's overhang as string. 1040 1041 Can be "3' overhang", "5' overhang", "blunt", "unknown". 1042 """ 1043 return 'unknown'
1044 1045 @classmethod
1046 - def compatible_end(cls):
1047 """List all enzymes that produce compatible ends for the enzyme.""" 1048 return []
1049 1050 @classmethod
1051 - def _mod1(cls, other):
1052 """Test if other enzyme produces compatible ends for enzyme (PRIVATE). 1053 1054 For internal use only. 1055 1056 Test for the compatibility of restriction ending of RE and other. 1057 """ 1058 return False
1059
1060 1061 -class Blunt(AbstractCut):
1062 """Implement methods for enzymes that produce blunt ends. 1063 1064 The enzyme cuts the + strand and the - strand of the DNA at the same 1065 place. 1066 1067 Internal use only. Not meant to be instantiated. 1068 """ 1069 1070 @classmethod
1071 - def catalyse(cls, dna, linear=True):
1072 """List the sequence fragments after cutting dna with enzyme. 1073 1074 RE.catalyze(dna, linear=True) -> tuple of DNA. 1075 1076 Return a tuple of dna as will be produced by using RE to restrict the 1077 dna. 1078 1079 dna must be a Bio.Seq.Seq instance or a Bio.Seq.MutableSeq instance. 1080 1081 If linear is False, the sequence is considered to be circular and the 1082 output will be modified accordingly. 1083 """ 1084 r = cls.search(dna, linear) 1085 d = cls.dna 1086 if not r: 1087 return d[1:], 1088 fragments = [] 1089 length = len(r) - 1 1090 if d.is_linear(): 1091 # 1092 # START of the sequence to FIRST site. 1093 # 1094 fragments.append(d[1:r[0]]) 1095 if length: 1096 # 1097 # if more than one site add them. 1098 # 1099 fragments += [d[r[x]:r[x + 1]] for x in range(length)] 1100 # 1101 # LAST site to END of the sequence. 1102 # 1103 fragments.append(d[r[-1]:]) 1104 else: 1105 # 1106 # circular : bridge LAST site to FIRST site. 1107 # 1108 fragments.append(d[r[-1]:] + d[1:r[0]]) 1109 if not length: 1110 # 1111 # one site we finish here. 1112 # 1113 return tuple(fragments) 1114 # 1115 # add the others. 1116 # 1117 fragments += [d[r[x]:r[x + 1]] for x in range(length)] 1118 return tuple(fragments)
1119 catalyze = catalyse 1120 1121 @classmethod
1122 - def is_blunt(cls):
1123 """Return if the enzyme produces blunt ends. 1124 1125 True if the enzyme produces blunt end. 1126 1127 Related methods: 1128 - RE.is_3overhang() 1129 - RE.is_5overhang() 1130 - RE.is_unknown() 1131 1132 """ 1133 return True
1134 1135 @classmethod
1136 - def is_5overhang(cls):
1137 """Return if the enzymes produces 5' overhanging ends. 1138 1139 True if the enzyme produces 5' overhang sticky end. 1140 1141 Related methods: 1142 - RE.is_3overhang() 1143 - RE.is_blunt() 1144 - RE.is_unknown() 1145 1146 """ 1147 return False
1148 1149 @classmethod
1150 - def is_3overhang(cls):
1151 """Return if the enzyme produces 3' overhanging ends. 1152 1153 True if the enzyme produces 3' overhang sticky end. 1154 1155 Related methods: 1156 - RE.is_5overhang() 1157 - RE.is_blunt() 1158 - RE.is_unknown() 1159 1160 """ 1161 return False
1162 1163 @classmethod
1164 - def overhang(cls):
1165 """Return the type of the enzyme's overhang as string. 1166 1167 Can be "3' overhang", "5' overhang", "blunt", "unknown". 1168 """ 1169 return 'blunt'
1170 1171 @classmethod
1172 - def compatible_end(cls, batch=None):
1173 """List all enzymes that produce compatible ends for the enzyme.""" 1174 if not batch: 1175 batch = AllEnzymes 1176 r = sorted(x for x in iter(AllEnzymes) if x.is_blunt()) 1177 return r
1178 1179 @staticmethod
1180 - def _mod1(other):
1181 """Test if other enzyme produces compatible ends for enzyme (PRIVATE). 1182 1183 For internal use only 1184 1185 Test for the compatibility of restriction ending of RE and other. 1186 """ 1187 return issubclass(other, Blunt)
1188
1189 1190 -class Ov5(AbstractCut):
1191 """Implement methods for enzymes that produce 5' overhanging ends. 1192 1193 The enzyme cuts the + strand after the - strand of the DNA. 1194 1195 Internal use only. Not meant to be instantiated. 1196 """ 1197 1198 @classmethod
1199 - def catalyse(cls, dna, linear=True):
1200 """List the sequence fragments after cutting dna with enzyme. 1201 1202 RE.catalyze(dna, linear=True) -> tuple of DNA. 1203 1204 Return a tuple of dna as will be produced by using RE to restrict the 1205 dna. 1206 1207 dna must be a Bio.Seq.Seq instance or a Bio.Seq.MutableSeq instance. 1208 1209 If linear is False, the sequence is considered to be circular and the 1210 output will be modified accordingly. 1211 """ 1212 r = cls.search(dna, linear) 1213 d = cls.dna 1214 if not r: 1215 return d[1:], 1216 length = len(r) - 1 1217 fragments = [] 1218 if d.is_linear(): 1219 # 1220 # START of the sequence to FIRST site. 1221 # 1222 fragments.append(d[1:r[0]]) 1223 if length: 1224 # 1225 # if more than one site add them. 1226 # 1227 fragments += [d[r[x]:r[x + 1]] for x in range(length)] 1228 # 1229 # LAST site to END of the sequence. 1230 # 1231 fragments.append(d[r[-1]:]) 1232 else: 1233 # 1234 # circular : bridge LAST site to FIRST site. 1235 # 1236 fragments.append(d[r[-1]:] + d[1:r[0]]) 1237 if not length: 1238 # 1239 # one site we finish here. 1240 # 1241 return tuple(fragments) 1242 # 1243 # add the others. 1244 # 1245 fragments += [d[r[x]:r[x + 1]] for x in range(length)] 1246 return tuple(fragments)
1247 catalyze = catalyse 1248 1249 @classmethod
1250 - def is_blunt(cls):
1251 """Return if the enzyme produces blunt ends. 1252 1253 True if the enzyme produces blunt end. 1254 1255 Related methods: 1256 - RE.is_3overhang() 1257 - RE.is_5overhang() 1258 - RE.is_unknown() 1259 1260 """ 1261 return False
1262 1263 @classmethod
1264 - def is_5overhang(cls):
1265 """Return if the enzymes produces 5' overhanging ends. 1266 1267 True if the enzyme produces 5' overhang sticky end. 1268 1269 Related methods: 1270 - RE.is_3overhang() 1271 - RE.is_blunt() 1272 - RE.is_unknown() 1273 1274 """ 1275 return True
1276 1277 @classmethod
1278 - def is_3overhang(cls):
1279 """Return if the enzyme produces 3' overhanging ends. 1280 1281 True if the enzyme produces 3' overhang sticky end. 1282 1283 Related methods: 1284 - RE.is_5overhang() 1285 - RE.is_blunt() 1286 - RE.is_unknown() 1287 1288 """ 1289 return False
1290 1291 @classmethod
1292 - def overhang(cls):
1293 """Return the type of the enzyme's overhang as string. 1294 1295 Can be "3' overhang", "5' overhang", "blunt", "unknown". 1296 """ 1297 return "5' overhang"
1298 1299 @classmethod
1300 - def compatible_end(cls, batch=None):
1301 """List all enzymes that produce compatible ends for the enzyme.""" 1302 if not batch: 1303 batch = AllEnzymes 1304 r = sorted(x for x in iter(AllEnzymes) if x.is_5overhang() and 1305 x % cls) 1306 return r
1307 1308 @classmethod
1309 - def _mod1(cls, other):
1310 """Test if other enzyme produces compatible ends for enzyme (PRIVATE). 1311 1312 For internal use only. 1313 1314 Test for the compatibility of restriction ending of RE and other. 1315 """ 1316 if issubclass(other, Ov5): 1317 return cls._mod2(other) 1318 else: 1319 return False
1320
1321 1322 -class Ov3(AbstractCut):
1323 """Implement methods for enzymes that produce 3' overhanging ends. 1324 1325 The enzyme cuts the - strand after the + strand of the DNA. 1326 1327 Internal use only. Not meant to be instantiated. 1328 """ 1329 1330 @classmethod
1331 - def catalyse(cls, dna, linear=True):
1332 """List the sequence fragments after cutting dna with enzyme. 1333 1334 RE.catalyze(dna, linear=True) -> tuple of DNA. 1335 1336 Return a tuple of dna as will be produced by using RE to restrict the 1337 dna. 1338 1339 dna must be a Bio.Seq.Seq instance or a Bio.Seq.MutableSeq instance. 1340 1341 If linear is False, the sequence is considered to be circular and the 1342 output will be modified accordingly. 1343 """ 1344 r = cls.search(dna, linear) 1345 d = cls.dna 1346 if not r: 1347 return d[1:], 1348 fragments = [] 1349 length = len(r) - 1 1350 if d.is_linear(): 1351 # 1352 # START of the sequence to FIRST site. 1353 # 1354 fragments.append(d[1:r[0]]) 1355 if length: 1356 # 1357 # if more than one site add them. 1358 # 1359 fragments += [d[r[x]:r[x + 1]] for x in range(length)] 1360 # 1361 # LAST site to END of the sequence. 1362 # 1363 fragments.append(d[r[-1]:]) 1364 else: 1365 # 1366 # circular : bridge LAST site to FIRST site. 1367 # 1368 fragments.append(d[r[-1]:] + d[1:r[0]]) 1369 if not length: 1370 # 1371 # one site we finish here. 1372 # 1373 return tuple(fragments) 1374 # 1375 # add the others. 1376 # 1377 fragments += [d[r[x]:r[x + 1]] for x in range(length)] 1378 return tuple(fragments)
1379 catalyze = catalyse 1380 1381 @classmethod
1382 - def is_blunt(cls):
1383 """Return if the enzyme produces blunt ends. 1384 1385 True if the enzyme produces blunt end. 1386 1387 Related methods: 1388 - RE.is_3overhang() 1389 - RE.is_5overhang() 1390 - RE.is_unknown() 1391 1392 """ 1393 return False
1394 1395 @classmethod
1396 - def is_5overhang(cls):
1397 """Return if the enzymes produces 5' overhanging ends. 1398 1399 True if the enzyme produces 5' overhang sticky end. 1400 1401 Related methods: 1402 - RE.is_3overhang() 1403 - RE.is_blunt() 1404 - RE.is_unknown() 1405 1406 """ 1407 return False
1408 1409 @classmethod
1410 - def is_3overhang(cls):
1411 """Return if the enzyme produces 3' overhanging ends. 1412 1413 True if the enzyme produces 3' overhang sticky end. 1414 1415 Related methods: 1416 - RE.is_5overhang() 1417 - RE.is_blunt() 1418 - RE.is_unknown() 1419 1420 """ 1421 return True
1422 1423 @classmethod
1424 - def overhang(cls):
1425 """Return the type of the enzyme's overhang as string. 1426 1427 Can be "3' overhang", "5' overhang", "blunt", "unknown". 1428 """ 1429 return "3' overhang"
1430 1431 @classmethod
1432 - def compatible_end(cls, batch=None):
1433 """List all enzymes that produce compatible ends for the enzyme.""" 1434 if not batch: 1435 batch = AllEnzymes 1436 r = sorted(x for x in iter(AllEnzymes) if x.is_3overhang() and 1437 x % cls) 1438 return r
1439 1440 @classmethod
1441 - def _mod1(cls, other):
1442 """Test if other enzyme produces compatible ends for enzyme (PRIVATE). 1443 1444 For internal use only. 1445 1446 Test for the compatibility of restriction ending of RE and other. 1447 """ 1448 # 1449 # called by RE._mod1(other) when the one of the enzyme is ambiguous 1450 # 1451 if issubclass(other, Ov3): 1452 return cls._mod2(other) 1453 else: 1454 return False
1455
1456 1457 -class Defined(AbstractCut):
1458 """Implement methods for enzymes with defined recognition site and cut. 1459 1460 Typical example : EcoRI -> G^AATT_C 1461 The overhang will always be AATT 1462 Notes: 1463 Blunt enzymes are always defined. Even if their site is GGATCCNNN^_N 1464 Their overhang is always the same : blunt! 1465 1466 Internal use only. Not meant to be instantiated. 1467 """ 1468 1469 @classmethod
1470 - def _drop(cls):
1471 """Remove cuts that are outsite of the sequence (PRIVATE). 1472 1473 For internal use only. 1474 1475 Drop the site that are situated outside the sequence in linear 1476 sequence. Modify the index for site in circular sequences. 1477 """ 1478 # 1479 # remove or modify the results that are outside the sequence. 1480 # This is necessary since after finding the site we add the distance 1481 # from the site to the cut with the _modify and _rev_modify methods. 1482 # For linear we will remove these sites altogether. 1483 # For circular sequence, we modify the result rather than _drop it 1484 # since the site is in the sequence. 1485 # 1486 length = len(cls.dna) 1487 drop = itertools.dropwhile 1488 take = itertools.takewhile 1489 if cls.dna.is_linear(): 1490 cls.results = [x for x in drop(lambda x:x <= 1, cls.results)] 1491 cls.results = [x for x in take(lambda x:x <= length, cls.results)] 1492 else: 1493 for index, location in enumerate(cls.results): 1494 if location < 1: 1495 cls.results[index] += length 1496 else: 1497 break 1498 for index, location in enumerate(cls.results[::-1]): 1499 if location > length: 1500 cls.results[-(index + 1)] -= length 1501 else: 1502 break 1503 return
1504 1505 @classmethod
1506 - def is_defined(cls):
1507 """Return if recognition sequence and cut are defined. 1508 1509 True if the sequence recognised and cut is constant, 1510 i.e. the recognition site is not degenerated AND the enzyme cut inside 1511 the site. 1512 1513 Related methods: 1514 - RE.is_ambiguous() 1515 - RE.is_unknown() 1516 1517 """ 1518 return True
1519 1520 @classmethod
1521 - def is_ambiguous(cls):
1522 """Return if recognition sequence and cut may be ambiguous. 1523 1524 True if the sequence recognised and cut is ambiguous, 1525 i.e. the recognition site is degenerated AND/OR the enzyme cut outside 1526 the site. 1527 1528 Related methods: 1529 - RE.is_defined() 1530 - RE.is_unknown() 1531 1532 """ 1533 return False
1534 1535 @classmethod
1536 - def is_unknown(cls):
1537 """Return if recognition sequence is unknown. 1538 1539 True if the sequence is unknown, 1540 i.e. the recognition site has not been characterised yet. 1541 1542 Related methods: 1543 - RE.is_defined() 1544 - RE.is_ambiguous() 1545 1546 """ 1547 return False
1548 1549 @classmethod
1550 - def elucidate(cls):
1551 """Return a string representing the recognition site and cuttings. 1552 1553 Return a representation of the site with the cut on the (+) strand 1554 represented as '^' and the cut on the (-) strand as '_'. 1555 ie: 1556 1557 >>> EcoRI.elucidate() # 5' overhang 1558 'G^AATT_C' 1559 >>> KpnI.elucidate() # 3' overhang 1560 'G_GTAC^C' 1561 >>> EcoRV.elucidate() # blunt 1562 'GAT^_ATC' 1563 >>> SnaI.elucidate() # NotDefined, cut profile unknown. 1564 '? GTATAC ?' 1565 >>> 1566 1567 """ 1568 f5 = cls.fst5 1569 f3 = cls.fst3 1570 site = cls.site 1571 if cls.cut_twice(): 1572 re = 'cut twice, not yet implemented sorry.' 1573 elif cls.is_5overhang(): 1574 if f5 == f3 == 0: 1575 re = 'N^' + cls.site + '_N' 1576 elif f3 == 0: 1577 re = site[:f5] + '^' + site[f5:] + '_N' 1578 else: 1579 re = site[:f5] + '^' + site[f5:f3] + '_' + site[f3:] 1580 elif cls.is_blunt(): 1581 re = site[:f5] + '^_' + site[f5:] 1582 else: 1583 if f5 == f3 == 0: 1584 re = 'N_' + site + '^N' 1585 else: 1586 re = site[:f3] + '_' + site[f3:f5] + '^' + site[f5:] 1587 return re
1588 1589 @classmethod
1590 - def _mod2(cls, other):
1591 """Test if other enzyme produces compatible ends for enzyme (PRIVATE). 1592 1593 For internal use only. 1594 1595 Test for the compatibility of restriction ending of RE and other. 1596 """ 1597 # 1598 # called by RE._mod1(other) when the one of the enzyme is ambiguous 1599 # 1600 if other.ovhgseq == cls.ovhgseq: 1601 return True 1602 elif issubclass(other, Ambiguous): 1603 return other._mod2(cls) 1604 else: 1605 return False
1606
1607 1608 -class Ambiguous(AbstractCut):
1609 """Implement methods for enzymes that produce variable overhangs. 1610 1611 Typical example : BstXI -> CCAN_NNNN^NTGG 1612 The overhang can be any sequence of 4 bases. 1613 1614 Notes: 1615 Blunt enzymes are always defined. Even if their site is GGATCCNNN^_N 1616 Their overhang is always the same : blunt! 1617 1618 Internal use only. Not meant to be instantiated. 1619 1620 """ 1621 1622 @classmethod
1623 - def _drop(cls):
1624 """Remove cuts that are outsite of the sequence (PRIVATE). 1625 1626 For internal use only. 1627 1628 Drop the site that are situated outside the sequence in linear 1629 sequence. Modify the index for site in circular sequences. 1630 """ 1631 length = len(cls.dna) 1632 drop = itertools.dropwhile 1633 take = itertools.takewhile 1634 if cls.dna.is_linear(): 1635 cls.results = [x for x in drop(lambda x: x <= 1, cls.results)] 1636 cls.results = [x for x in take(lambda x: x <= length, cls.results)] 1637 else: 1638 for index, location in enumerate(cls.results): 1639 if location < 1: 1640 cls.results[index] += length 1641 else: 1642 break 1643 for index, location in enumerate(cls.results[::-1]): 1644 if location > length: 1645 cls.results[-(index + 1)] -= length 1646 else: 1647 break 1648 return
1649 1650 @classmethod
1651 - def is_defined(cls):
1652 """Return if recognition sequence and cut are defined. 1653 1654 True if the sequence recognised and cut is constant, 1655 i.e. the recognition site is not degenerated AND the enzyme cut inside 1656 the site. 1657 1658 Related methods: 1659 - RE.is_ambiguous() 1660 - RE.is_unknown() 1661 1662 """ 1663 return False
1664 1665 @classmethod
1666 - def is_ambiguous(cls):
1667 """Return if recognition sequence and cut may be ambiguous. 1668 1669 True if the sequence recognised and cut is ambiguous, 1670 i.e. the recognition site is degenerated AND/OR the enzyme cut outside 1671 the site. 1672 1673 Related methods: 1674 - RE.is_defined() 1675 - RE.is_unknown() 1676 1677 """ 1678 return True
1679 1680 @classmethod
1681 - def is_unknown(cls):
1682 """Return if recognition sequence is unknown. 1683 1684 True if the sequence is unknown, 1685 i.e. the recognition site has not been characterised yet. 1686 1687 Related methods: 1688 - RE.is_defined() 1689 - RE.is_ambiguous() 1690 1691 """ 1692 return False
1693 1694 @classmethod
1695 - def _mod2(cls, other):
1696 """Test if other enzyme produces compatible ends for enzyme (PRIVATE). 1697 1698 For internal use only. 1699 1700 Test for the compatibility of restriction ending of RE and other. 1701 """ 1702 # 1703 # called by RE._mod1(other) when the one of the enzyme is ambiguous 1704 # 1705 if len(cls.ovhgseq) != len(other.ovhgseq): 1706 return False 1707 else: 1708 se = cls.ovhgseq 1709 for base in se: 1710 if base in 'ATCG': 1711 pass 1712 if base in 'N': 1713 se = '.'.join(se.split('N')) 1714 if base in 'RYWMSKHDBV': 1715 expand = '[' + matching[base] + ']' 1716 se = expand.join(se.split(base)) 1717 if re.match(se, other.ovhgseq): 1718 return True 1719 else: 1720 return False
1721 1722 @classmethod
1723 - def elucidate(cls):
1724 """Return a string representing the recognition site and cuttings. 1725 1726 Return a representation of the site with the cut on the (+) strand 1727 represented as '^' and the cut on the (-) strand as '_'. 1728 ie: 1729 1730 >>> EcoRI.elucidate() # 5' overhang 1731 'G^AATT_C' 1732 >>> KpnI.elucidate() # 3' overhang 1733 'G_GTAC^C' 1734 >>> EcoRV.elucidate() # blunt 1735 'GAT^_ATC' 1736 >>> SnaI.elucidate() # NotDefined, cut profile unknown. 1737 '? GTATAC ?' 1738 >>> 1739 1740 """ 1741 f5 = cls.fst5 1742 f3 = cls.fst3 1743 length = len(cls) 1744 site = cls.site 1745 if cls.cut_twice(): 1746 re = 'cut twice, not yet implemented sorry.' 1747 elif cls.is_5overhang(): 1748 if f3 == f5 == 0: 1749 re = 'N^' + site + '_N' 1750 elif 0 <= f5 <= length and 0 <= f3 + length <= length: 1751 re = site[:f5] + '^' + site[f5:f3] + '_' + site[f3:] 1752 elif 0 <= f5 <= length: 1753 re = site[:f5] + '^' + site[f5:] + f3 * 'N' + '_N' 1754 elif 0 <= f3 + length <= length: 1755 re = 'N^' + abs(f5) * 'N' + site[:f3] + '_' + site[f3:] 1756 elif f3 + length < 0: 1757 re = 'N^' * abs(f5) * 'N' + '_' + abs(length + f3) * 'N' + site 1758 elif f5 > length: 1759 re = site + (f5 - length) * 'N' + '^' + (length + 1760 f3 - f5) * 'N' + '_N' 1761 else: 1762 re = 'N^' + abs(f5) * 'N' + site + f3 * 'N' + '_N' 1763 elif cls.is_blunt(): 1764 if f5 < 0: 1765 re = 'N^_' + abs(f5) * 'N' + site 1766 elif f5 > length: 1767 re = site + (f5 - length) * 'N' + '^_N' 1768 else: 1769 raise ValueError('%s.easyrepr() : error f5=%i' 1770 % (cls.name, f5)) 1771 else: 1772 if f3 == 0: 1773 if f5 == 0: 1774 re = 'N_' + site + '^N' 1775 else: 1776 re = site + '_' + (f5 - length) * 'N' + '^N' 1777 elif 0 < f3 + length <= length and 0 <= f5 <= length: 1778 re = site[:f3] + '_' + site[f3:f5] + '^' + site[f5:] 1779 elif 0 < f3 + length <= length: 1780 re = site[:f3] + '_' + site[f3:] + (f5 - length) * 'N' + '^N' 1781 elif 0 <= f5 <= length: 1782 re = 'N_' + 'N' * (f3 + length) + site[:f5] + '^' + site[f5:] 1783 elif f3 > 0: 1784 re = site + f3 * 'N' + '_' + (f5 - f3 - length) * 'N' + '^N' 1785 elif f5 < 0: 1786 re = 'N_' + abs(f3 - f5 + length) * 'N' + '^' + abs(f5) * 'N' \ 1787 + site 1788 else: 1789 re = 'N_' + abs(f3 + length) * 'N' + site + (f5 - length) * \ 1790 'N' + '^N' 1791 return re
1792
1793 1794 -class NotDefined(AbstractCut):
1795 """Implement methods for enzymes with non-characterized overhangs. 1796 1797 Correspond to NoCut and Unknown. 1798 1799 Internal use only. Not meant to be instantiated. 1800 """ 1801 1802 @classmethod
1803 - def _drop(cls):
1804 """Remove cuts that are outsite of the sequence (PRIVATE). 1805 1806 For internal use only. 1807 1808 Drop the site that are situated outside the sequence in linear 1809 sequence. Modify the index for site in circular sequences. 1810 """ 1811 if cls.dna.is_linear(): 1812 return 1813 else: 1814 length = len(cls.dna) 1815 for index, location in enumerate(cls.results): 1816 if location < 1: 1817 cls.results[index] += length 1818 else: 1819 break 1820 for index, location in enumerate(cls.results[:-1]): 1821 if location > length: 1822 cls.results[-(index + 1)] -= length 1823 else: 1824 break 1825 return
1826 1827 @classmethod
1828 - def is_defined(cls):
1829 """Return if recognition sequence and cut are defined. 1830 1831 True if the sequence recognised and cut is constant, 1832 i.e. the recognition site is not degenerated AND the enzyme cut inside 1833 the site. 1834 1835 Related methods: 1836 - RE.is_ambiguous() 1837 - RE.is_unknown() 1838 1839 """ 1840 return False
1841 1842 @classmethod
1843 - def is_ambiguous(cls):
1844 """Return if recognition sequence and cut may be ambiguous. 1845 1846 True if the sequence recognised and cut is ambiguous, 1847 i.e. the recognition site is degenerated AND/OR the enzyme cut outside 1848 the site. 1849 1850 Related methods: 1851 - RE.is_defined() 1852 - RE.is_unknown() 1853 1854 """ 1855 return False
1856 1857 @classmethod
1858 - def is_unknown(cls):
1859 """Return if recognition sequence is unknown. 1860 1861 True if the sequence is unknown, 1862 i.e. the recognition site has not been characterised yet. 1863 1864 Related methods: 1865 - RE.is_defined() 1866 - RE.is_ambiguous() 1867 1868 """ 1869 return True
1870 1871 @classmethod
1872 - def _mod2(cls, other):
1873 """Test if other enzyme produces compatible ends for enzyme (PRIVATE). 1874 1875 For internal use only. 1876 1877 Test for the compatibility of restriction ending of RE and other. 1878 """ 1879 # 1880 # Normally we should not arrive here. But well better safe than 1881 # sorry. 1882 # the overhang is not defined we are compatible with nobody. 1883 # could raise an Error may be rather than return quietly. 1884 # 1885 # return False 1886 raise ValueError("%s.mod2(%s), %s : NotDefined. pas glop pas glop!" 1887 % (str(cls), str(other), str(cls)))
1888 1889 @classmethod
1890 - def elucidate(cls):
1891 """Return a string representing the recognition site and cuttings. 1892 1893 Return a representation of the site with the cut on the (+) strand 1894 represented as '^' and the cut on the (-) strand as '_'. 1895 ie: 1896 1897 >>> EcoRI.elucidate() # 5' overhang 1898 'G^AATT_C' 1899 >>> KpnI.elucidate() # 3' overhang 1900 'G_GTAC^C' 1901 >>> EcoRV.elucidate() # blunt 1902 'GAT^_ATC' 1903 >>> SnaI.elucidate() # NotDefined, cut profile unknown. 1904 '? GTATAC ?' 1905 >>> 1906 1907 """ 1908 return '? %s ?' % cls.site
1909
1910 1911 -class Commercially_available(AbstractCut):
1912 """Implement methods for enzymes which are commercially available. 1913 1914 Internal use only. Not meant to be instantiated. 1915 """ 1916 1917 # 1918 # Recent addition to Rebase make this naming convention uncertain. 1919 # May be better to says enzymes which have a supplier. 1920 # 1921 1922 @classmethod
1923 - def suppliers(cls):
1924 """Print a list of suppliers of the enzyme.""" 1925 for s in cls.suppl: 1926 print(suppliers_dict[s][0] + ',') 1927 return
1928 1929 @classmethod
1930 - def supplier_list(cls):
1931 """Return a list of suppliers of the enzyme.""" 1932 return [v[0] for k, v in suppliers_dict.items() if k in cls.suppl]
1933 1934 @classmethod
1935 - def buffers(cls, supplier):
1936 """Return the recommended buffer of the supplier for this enzyme. 1937 1938 Not implemented yet. 1939 """ 1940 return
1941 1942 @classmethod
1943 - def is_comm(cls):
1944 """Return if enzyme is commercially available. 1945 1946 True if RE has suppliers. 1947 """ 1948 return True
1949
1950 1951 -class Not_available(AbstractCut):
1952 """Implement methods for enzymes which are not commercially available. 1953 1954 Internal use only. Not meant to be instantiated. 1955 """ 1956 1957 @staticmethod
1958 - def suppliers():
1959 """Print a list of suppliers of the enzyme.""" 1960 return None
1961 1962 @classmethod
1963 - def supplier_list(cls):
1964 """Return a list of suppliers of the enzyme.""" 1965 return []
1966 1967 @classmethod
1968 - def buffers(cls, supplier):
1969 """Return the recommended buffer of the supplier for this enzyme. 1970 1971 Not implemented yet. 1972 """ 1973 raise TypeError("Enzyme not commercially available.")
1974 1975 @classmethod
1976 - def is_comm(cls):
1977 """Return if enzyme is commercially available. 1978 1979 True if RE has suppliers. 1980 """ 1981 return False
1982
1983 1984 ############################################################################### 1985 # # 1986 # Restriction Batch # 1987 # # 1988 ############################################################################### 1989 1990 1991 -class RestrictionBatch(set):
1992 """Class for operations on more than one enzyme.""" 1993
1994 - def __init__(self, first=(), suppliers=()):
1995 """Initialize empty RB or pre-fill with enzymes (from supplier).""" 1996 first = [self.format(x) for x in first] 1997 first += [eval(x) for n in suppliers for x in suppliers_dict[n][1]] 1998 set.__init__(self, first) 1999 self.mapping = dict.fromkeys(self) 2000 self.already_mapped = None 2001 self.suppliers = [x for x in suppliers if x in suppliers_dict]
2002
2003 - def __str__(self):
2004 if len(self) < 5: 2005 return '+'.join(self.elements()) 2006 else: 2007 return '...'.join(('+'.join(self.elements()[:2]), 2008 '+'.join(self.elements()[-2:])))
2009
2010 - def __repr__(self):
2011 return 'RestrictionBatch(%s)' % self.elements()
2012
2013 - def __contains__(self, other):
2014 try: 2015 other = self.format(other) 2016 except ValueError: # other is not a restriction enzyme 2017 return False 2018 return set.__contains__(self, other)
2019
2020 - def __div__(self, other):
2021 """Override '/' operator to use as search method.""" 2022 return self.search(other)
2023
2024 - def __rdiv__(self, other):
2025 """Override division with reversed operands to use as search method.""" 2026 return self.search(other)
2027
2028 - def __truediv__(self, other):
2029 """Override Python 3 division operator to use as search method. 2030 2031 Like __div__. 2032 """ 2033 return self.search(other)
2034
2035 - def __rtruediv__(self, other):
2036 """As __truediv___, with reversed operands. 2037 2038 Like __rdiv__. 2039 """ 2040 return self.search(other)
2041
2042 - def get(self, enzyme, add=False):
2043 """Check if enzyme is in batch and return it. 2044 2045 If add is True and enzyme is not in batch add enzyme to batch. 2046 If add is False (which is the default) only return enzyme. 2047 If enzyme is not a RestrictionType or can not be evaluated to 2048 a RestrictionType, raise a ValueError. 2049 """ 2050 e = self.format(enzyme) 2051 if e in self: 2052 return e 2053 elif add: 2054 self.add(e) 2055 return e 2056 else: 2057 raise ValueError('enzyme %s is not in RestrictionBatch' 2058 % e.__name__)
2059
2060 - def lambdasplit(self, func):
2061 """Filter enzymes in batch with supplied function. 2062 2063 The new batch will contain only the enzymes for which 2064 func return True. 2065 """ 2066 d = [x for x in filter(func, self)] 2067 new = RestrictionBatch() 2068 new._data = dict(zip(d, [True] * len(d))) 2069 return new
2070
2071 - def add_supplier(self, letter):
2072 """Add all enzymes from a given supplier to batch. 2073 2074 letter represents the suppliers as defined in the dictionary 2075 RestrictionDictionary.suppliers 2076 Returns None. 2077 Raise a KeyError if letter is not a supplier code. 2078 """ 2079 supplier = suppliers_dict[letter] 2080 self.suppliers.append(letter) 2081 for x in supplier[1]: 2082 self.add_nocheck(eval(x)) 2083 return
2084
2085 - def current_suppliers(self):
2086 """List the current suppliers for the restriction batch. 2087 2088 Return a sorted list of the suppliers which have been used to 2089 create the batch. 2090 """ 2091 suppl_list = sorted(suppliers_dict[x][0] for x in self.suppliers) 2092 return suppl_list
2093
2094 - def __iadd__(self, other):
2095 """Override '+=' for use with sets. 2096 2097 b += other -> add other to b, check the type of other. 2098 """ 2099 self.add(other) 2100 return self
2101
2102 - def __add__(self, other):
2103 """Overide '+' for use with sets. 2104 2105 b + other -> new RestrictionBatch. 2106 """ 2107 new = self.__class__(self) 2108 new.add(other) 2109 return new
2110
2111 - def remove(self, other):
2112 """Remove enzyme from restriction batch. 2113 2114 Safe set.remove method. Verify that other is a RestrictionType or can 2115 be evaluated to a RestrictionType. 2116 Raise a ValueError if other can not be evaluated to a RestrictionType. 2117 Raise a KeyError if other is not in B. 2118 """ 2119 return set.remove(self, self.format(other))
2120
2121 - def add(self, other):
2122 """Add a restriction enzyme to the restriction batch. 2123 2124 Safe set.add method. Verify that other is a RestrictionType or can be 2125 evaluated to a RestrictionType. 2126 Raise a ValueError if other can not be evaluated to a RestrictionType. 2127 """ 2128 return set.add(self, self.format(other))
2129
2130 - def add_nocheck(self, other):
2131 """Add restriction enzyme to batch without checking its type.""" 2132 return set.add(self, other)
2133
2134 - def format(self, y):
2135 """Evaluate enzyme (name) and return it (as RestrictionType). 2136 2137 If y is a RestrictionType return y. 2138 If y can be evaluated to a RestrictionType return eval(y). 2139 Raise a ValueError in all other case. 2140 """ 2141 try: 2142 if isinstance(y, RestrictionType): 2143 return y 2144 elif isinstance(eval(str(y)), RestrictionType): 2145 return eval(y) 2146 else: 2147 pass 2148 except (NameError, SyntaxError): 2149 pass 2150 raise ValueError('%s is not a RestrictionType' % y.__class__)
2151
2152 - def is_restriction(self, y):
2153 """Return if enzyme (name) is a known enzyme. 2154 2155 True if y or eval(y) is a RestrictionType. 2156 """ 2157 return (isinstance(y, RestrictionType) or 2158 isinstance(eval(str(y)), RestrictionType))
2159
2160 - def split(self, *classes, **bool):
2161 """Extract enzymes of a certain class and put in new RestrictionBatch. 2162 2163 B.split(class, [class.__name__ = True]) -> new RestrictionBatch. 2164 2165 It works but it is slow, so it has really an interest when splitting 2166 over multiple conditions. 2167 """ 2168 def splittest(element): 2169 for klass in classes: 2170 b = bool.get(klass.__name__, True) 2171 if issubclass(element, klass): 2172 if b: 2173 continue 2174 else: 2175 return False 2176 elif b: 2177 return False 2178 else: 2179 continue 2180 return True
2181 d = [k for k in filter(splittest, self)] 2182 new = RestrictionBatch() 2183 new._data = dict(zip(d, [True] * len(d))) 2184 return new
2185
2186 - def elements(self):
2187 """List the enzymes of the RestrictionBatch as list of strings. 2188 2189 Give all the names of the enzymes in B sorted alphabetically. 2190 """ 2191 l = sorted(str(e) for e in self) 2192 return l
2193
2194 - def as_string(self):
2195 """List the names of the enzymes of the RestrictionBatch. 2196 2197 Return a list of the name of the elements of the batch. 2198 """ 2199 return [str(e) for e in self]
2200 2201 @classmethod
2202 - def suppl_codes(cls):
2203 """Return a dicionary with supplier codes. 2204 2205 Letter code for the suppliers. 2206 """ 2207 supply = dict((k, v[0]) for k, v in suppliers_dict.items()) 2208 return supply
2209 2210 @classmethod
2211 - def show_codes(cls):
2212 """Print a list of supplier codes.""" 2213 supply = [' = '.join(i) for i in cls.suppl_codes().items()] 2214 print('\n'.join(supply)) 2215 return
2216
2217 - def search(self, dna, linear=True):
2218 """Return a dic of cutting sites in the seq for the batch enzymes.""" 2219 # 2220 # here we replace the search method of the individual enzymes 2221 # with one unique testing method. 2222 # 2223 if not hasattr(self, "already_mapped"): 2224 # TODO - Why does this happen! 2225 # Try the "doctest" at the start of PrintFormat.py 2226 self.already_mapped = None 2227 if isinstance(dna, DNA): 2228 # For the searching, we just care about the sequence as a string, 2229 # if that is the same we can use the cached search results. 2230 # At the time of writing, Seq == method isn't implemented, 2231 # and therefore does object identity which is stricter. 2232 if (str(dna), linear) == self.already_mapped: 2233 return self.mapping 2234 else: 2235 self.already_mapped = str(dna), linear 2236 fseq = FormattedSeq(dna, linear) 2237 self.mapping = dict((x, x.search(fseq)) for x in self) 2238 return self.mapping 2239 elif isinstance(dna, FormattedSeq): 2240 if (str(dna), dna.linear) == self.already_mapped: 2241 return self.mapping 2242 else: 2243 self.already_mapped = str(dna), dna.linear 2244 self.mapping = dict((x, x.search(dna)) for x in self) 2245 return self.mapping 2246 raise TypeError("Expected Seq or MutableSeq instance, got %s instead" 2247 % type(dna))
2248
2249 ############################################################################### 2250 # # 2251 # Restriction Analysis # 2252 # # 2253 ############################################################################### 2254 2255 2256 -class Analysis(RestrictionBatch, PrintFormat):
2257 """Provide methods for enhanced analysis and pretty printing.""" 2258
2259 - def __init__(self, restrictionbatch=RestrictionBatch(), sequence=DNA(''), 2260 linear=True):
2261 """Initialize an Analysis with RestrictionBatch and sequence. 2262 2263 Analysis([restrictionbatch [, sequence] linear=True]) 2264 -> New Analysis class. 2265 2266 For most of the methods of this class if a dictionary is given it will 2267 be used as the base to calculate the results. 2268 If no dictionary is given a new analysis using the RestrictionBatch 2269 which has been given when the Analysis class has been instantiated, 2270 will be carried out and used. 2271 """ 2272 RestrictionBatch.__init__(self, restrictionbatch) 2273 self.rb = restrictionbatch 2274 self.sequence = sequence 2275 self.linear = linear 2276 if self.sequence: 2277 self.search(self.sequence, self.linear)
2278
2279 - def __repr__(self):
2280 return 'Analysis(%s,%s,%s)' %\ 2281 (repr(self.rb), repr(self.sequence), self.linear)
2282
2283 - def _sub_set(self, wanted):
2284 """Filter result for keys which are in wanted. 2285 2286 A._sub_set(other_set) -> dict. 2287 2288 Internal use only. 2289 2290 Screen the results through wanted set. 2291 Keep only the results for which the enzymes is in wanted set. 2292 """ 2293 # It seems that this method is not used in the whole class! 2294 return dict((k, v) for k, v in self.mapping.items() if k in wanted)
2295
2296 - def _boundaries(self, start, end):
2297 """Set boundaries to correct values. 2298 2299 Format the boundaries for use with the methods that limit the 2300 search to only part of the sequence given to analyse. 2301 """ 2302 if not isinstance(start, int): 2303 raise TypeError('expected int, got %s instead' % type(start)) 2304 if not isinstance(end, int): 2305 raise TypeError('expected int, got %s instead' % type(end)) 2306 if start < 1: # Looks like this tries to do python list like indexing 2307 start += len(self.sequence) 2308 if end < 1: 2309 end += len(self.sequence) 2310 if start < end: 2311 pass 2312 else: 2313 start, end = end, start 2314 if start < end: 2315 return start, end, self._test_normal
2316
2317 - def _test_normal(self, start, end, site):
2318 """Test if site is between start and end. 2319 2320 Internal use only 2321 """ 2322 return start <= site < end
2323
2324 - def _test_reverse(self, start, end, site):
2325 """Test if site is between end and start (for circular sequences). 2326 2327 Internal use only. 2328 """ 2329 return start <= site <= len(self.sequence) or 1 <= site < end
2330
2331 - def format_output(self, dct=None, title='', s1=''):
2332 """Collect data and pass to PrintFormat. 2333 2334 A.format_output([dct[, title[, s1]]]) -> dct. 2335 2336 If dct is not given the full dictionary is used. 2337 """ 2338 if not dct: 2339 dct = self.mapping 2340 return PrintFormat.format_output(self, dct, title, s1)
2341
2342 - def print_that(self, dct=None, title='', s1=''):
2343 """Print the output of the analysis. 2344 2345 A.print_that([dct[, title[, s1[,print_]]]]) -> print the results 2346 from dct. 2347 2348 If dct is not given the full dictionary is used. 2349 s1: Title for non-cutting enzymes 2350 This method prints the output of A.format_output() and it is here 2351 for backwards compatibility. 2352 """ 2353 print(self.format_output(dct, title, s1))
2354
2355 - def change(self, **what):
2356 """Change parameters of print output. 2357 2358 `A.change(**attribute_name)` -> Change attribute of Analysis. 2359 2360 It is possible to change the width of the shell by setting 2361 self.ConsoleWidth to what you want. 2362 self.NameWidth refer to the maximal length of the enzyme name. 2363 2364 Changing one of these parameters here might not give the results 2365 you expect. In which case, you can settle back to a 80 columns shell 2366 or try to change self.Cmodulo and self.PrefWidth in PrintFormat until 2367 you get it right. 2368 """ 2369 for k, v in what.items(): 2370 if k in ('NameWidth', 'ConsoleWidth'): 2371 setattr(self, k, v) 2372 self.Cmodulo = self.ConsoleWidth % self.NameWidth 2373 self.PrefWidth = self.ConsoleWidth - self.Cmodulo 2374 elif k == 'sequence': 2375 setattr(self, 'sequence', v) 2376 self.search(self.sequence, self.linear) 2377 elif k == 'rb': 2378 self = Analysis.__init__(self, v, self.sequence, self.linear) 2379 elif k == 'linear': 2380 setattr(self, 'linear', v) 2381 self.search(self.sequence, v) 2382 elif k in ('Indent', 'Maxsize'): 2383 setattr(self, k, v) 2384 elif k in ('Cmodulo', 'PrefWidth'): 2385 raise AttributeError( 2386 'To change %s, change NameWidth and/or ConsoleWidth' % k) 2387 else: 2388 raise AttributeError('Analysis has no attribute %s' % k) 2389 return
2390
2391 - def full(self, linear=True):
2392 """Perform analysis with all enzymes of batch and return all results. 2393 2394 A.full() -> dict. 2395 2396 Full Restriction Map of the sequence. 2397 """ 2398 return self.mapping
2399
2400 - def blunt(self, dct=None):
2401 """Return only cuts that have blunt ends.""" 2402 if not dct: 2403 dct = self.mapping 2404 return dict((k, v) for k, v in dct.items() if k.is_blunt())
2405
2406 - def overhang5(self, dct=None):
2407 """Return only cuts that have 5' overhangs.""" 2408 if not dct: 2409 dct = self.mapping 2410 return dict((k, v) for k, v in dct.items() if k.is_5overhang())
2411
2412 - def overhang3(self, dct=None):
2413 """Return only cuts that have 3' overhangs.""" 2414 if not dct: 2415 dct = self.mapping 2416 return dict((k, v) for k, v in dct.items() if k.is_3overhang())
2417
2418 - def defined(self, dct=None):
2419 """Return only results from enzymes that produce defined overhangs.""" 2420 if not dct: 2421 dct = self.mapping 2422 return dict((k, v) for k, v in dct.items() if k.is_defined())
2423
2424 - def with_sites(self, dct=None):
2425 """Return only results from enzyme with at least one cut.""" 2426 if not dct: 2427 dct = self.mapping 2428 return dict((k, v) for k, v in dct.items() if v)
2429
2430 - def without_site(self, dct=None):
2431 """Return only results from enzymes that don't cut the sequence.""" 2432 if not dct: 2433 dct = self.mapping 2434 return dict((k, v) for k, v in dct.items() if not v)
2435
2436 - def with_N_sites(self, N, dct=None):
2437 """Return only results from enzymes that cut the sequence N times.""" 2438 if not dct: 2439 dct = self.mapping 2440 return dict((k, v) for k, v in dct.items()if len(v) == N)
2441
2442 - def with_number_list(self, list, dct=None):
2443 """Return only results from enzymes that cut (x,y,z,...) times.""" 2444 if not dct: 2445 dct = self.mapping 2446 return dict((k, v) for k, v in dct.items() if len(v) in list)
2447
2448 - def with_name(self, names, dct=None):
2449 """Return only results from enzymes which names are listed.""" 2450 for i, enzyme in enumerate(names): 2451 if enzyme not in AllEnzymes: 2452 warnings.warn("no data for the enzyme: %s" % enzyme, 2453 BiopythonWarning) 2454 del names[i] 2455 if not dct: 2456 return RestrictionBatch(names).search(self.sequence, self.linear) 2457 return dict((n, dct[n]) for n in names if n in dct)
2458
2459 - def with_site_size(self, site_size, dct=None):
2460 """Return only results form enzymes with a given site size.""" 2461 sites = [name for name in self if name.size == site_size] 2462 if not dct: 2463 return RestrictionBatch(sites).search(self.sequence) 2464 return dict((k, v) for k, v in dct.items() if k in site_size)
2465
2466 - def only_between(self, start, end, dct=None):
2467 """Return only results from enzymes that only cut within start, end.""" 2468 start, end, test = self._boundaries(start, end) 2469 if not dct: 2470 dct = self.mapping 2471 d = dict(dct) 2472 for key, sites in dct.items(): 2473 if not sites: 2474 del d[key] 2475 continue 2476 for site in sites: 2477 if test(start, end, site): 2478 continue 2479 else: 2480 del d[key] 2481 break 2482 return d
2483
2484 - def between(self, start, end, dct=None):
2485 """Return only results from enzymes that cut at least within borders. 2486 2487 Enzymes that cut the sequence at least in between start and end. 2488 They may cut outside as well. 2489 """ 2490 start, end, test = self._boundaries(start, end) 2491 d = {} 2492 if not dct: 2493 dct = self.mapping 2494 for key, sites in dct.items(): 2495 for site in sites: 2496 if test(start, end, site): 2497 d[key] = sites 2498 break 2499 continue 2500 return d
2501
2502 - def show_only_between(self, start, end, dct=None):
2503 """Return only results from within start, end. 2504 2505 Enzymes must cut inside start/end and may also cut outside. However, 2506 only the cutting positions within start/end will be returned. 2507 """ 2508 d = [] 2509 if start <= end: 2510 d = [(k, [vv for vv in v if start <= vv <= end]) 2511 for k, v in self.between(start, end, dct).items()] 2512 else: 2513 d = [(k, [vv for vv in v if start <= vv or vv <= end]) 2514 for k, v in self.between(start, end, dct).items()] 2515 return dict(d)
2516
2517 - def only_outside(self, start, end, dct=None):
2518 """Return only results from enzymes that only cut outside start, end. 2519 2520 Enzymes that cut the sequence outside of the region 2521 in between start and end but do not cut inside. 2522 """ 2523 start, end, test = self._boundaries(start, end) 2524 if not dct: 2525 dct = self.mapping 2526 d = dict(dct) 2527 for key, sites in dct.items(): 2528 if not sites: 2529 del d[key] 2530 continue 2531 for site in sites: 2532 if test(start, end, site): 2533 del d[key] 2534 break 2535 else: 2536 continue 2537 return d
2538
2539 - def outside(self, start, end, dct=None):
2540 """Return only results from enzymes that at least cut outside borders. 2541 2542 Enzymes that cut outside the region in between start and end. 2543 They may cut inside as well. 2544 """ 2545 start, end, test = self._boundaries(start, end) 2546 if not dct: 2547 dct = self.mapping 2548 d = {} 2549 for key, sites in dct.items(): 2550 for site in sites: 2551 if test(start, end, site): 2552 continue 2553 else: 2554 d[key] = sites 2555 break 2556 return d
2557
2558 - def do_not_cut(self, start, end, dct=None):
2559 """Return only results from enzymes that don't cut between borders.""" 2560 if not dct: 2561 dct = self.mapping 2562 d = self.without_site() 2563 d.update(self.only_outside(start, end, dct)) 2564 return d
2565 2566 2567 # 2568 # The restriction enzyme classes are created dynamically when the module is 2569 # imported. Here is the magic which allow the creation of the 2570 # restriction-enzyme classes. 2571 # 2572 # The reason for the two dictionaries in Restriction_Dictionary 2573 # one for the types (which will be called pseudo-type as they really 2574 # correspond to the values that instances of RestrictionType can take) 2575 # and one for the enzymes is efficiency as the bases are evaluated 2576 # once per pseudo-type. 2577 # 2578 # However Restriction is still a very inefficient module at import. But 2579 # remember that around 660 classes (which is more or less the size of Rebase) 2580 # have to be created dynamically. However, this processing take place only 2581 # once. 2582 # This inefficiency is however largely compensated by the use of metaclass 2583 # which provide a very efficient layout for the class themselves mostly 2584 # alleviating the need of if/else loops in the class methods. 2585 # 2586 # It is essential to run Restriction with doc string optimisation (-OO 2587 # switch) as the doc string of 660 classes take a lot of processing. 2588 # 2589 CommOnly = RestrictionBatch() # commercial enzymes 2590 NonComm = RestrictionBatch() # not available commercially 2591 for TYPE, (bases, enzymes) in typedict.items(): 2592 # 2593 # The keys are the pseudo-types TYPE (stored as type1, type2...) 2594 # The names are not important and are only present to differentiate 2595 # the keys in the dict. All the pseudo-types are in fact RestrictionType. 2596 # These names will not be used after and the pseudo-types are not 2597 # kept in the locals() dictionary. It is therefore impossible to 2598 # import them. 2599 # Now, if you have look at the dictionary, you will see that not all the 2600 # types are present as those without corresponding enzymes have been 2601 # removed by Dictionary_Builder(). 2602 # 2603 # The values are tuples which contain 2604 # as first element a tuple of bases (as string) and 2605 # as second element the names of the enzymes. 2606 # 2607 # First eval the bases. 2608 # 2609 bases = tuple(eval(x) for x in bases) 2610 # 2611 # now create the particular value of RestrictionType for the classes 2612 # in enzymes. 2613 # 2614 T = type.__new__(RestrictionType, 'RestrictionType', bases, {}) 2615 for k in enzymes: 2616 # 2617 # Now, we go through all the enzymes and assign them their type. 2618 # enzymedict[k] contains the values of the attributes for this 2619 # particular class (self.site, self.ovhg,....). 2620 # 2621 newenz = T(k, bases, enzymedict[k]) 2622 # 2623 # we add the enzymes to the corresponding batch. 2624 # 2625 # No need to verify the enzyme is a RestrictionType -> add_nocheck 2626 # 2627 if newenz.is_comm(): 2628 CommOnly.add_nocheck(newenz) 2629 else: 2630 NonComm.add_nocheck(newenz) 2631 # 2632 # AllEnzymes is a RestrictionBatch with all the enzymes from Rebase. 2633 # 2634 AllEnzymes = RestrictionBatch(CommOnly) 2635 AllEnzymes.update(NonComm) 2636 # 2637 # Now, place the enzymes in locals so they can be imported. 2638 # 2639 names = [str(x) for x in AllEnzymes] 2640 try: 2641 del x # noqa 2642 except NameError: 2643 # Scoping changed in Python 3, the variable isn't leaked 2644 pass 2645 locals().update(dict(zip(names, AllEnzymes))) 2646 __all__ = ('FormattedSeq', 'Analysis', 'RestrictionBatch', 'AllEnzymes', 2647 'CommOnly', 'NonComm') + tuple(names) 2648 del k, enzymes, TYPE, bases, names 2649